Setup

Load R libraries

library(data.table)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(limma)
library(biomaRt)
library(fgsea)
library(goseq)

theme_set(theme_classic())

cell_type_name = params$cell_type_name
graph_weight = params$graph_weight

cell_type_name
## [1] "cd4"
graph_weight
## [1] "2.0"

Check enrichment of gene sets

Read in gene info and gene set assignments

file_tag = sprintf("%s_BL_%s", cell_type_name, graph_weight)

assayed_genes = scan(sprintf("output/gene_list_%s.txt", file_tag), 
                     what = character(), sep="\n")

gene_sets = scan(sprintf("output/name_s_%s.txt", file_tag), 
                 what = character(), sep="\n")

gene_sets = sapply(gene_sets, strsplit, USE.NAMES=FALSE, split=",")
n_genes   = sapply(gene_sets, length)
names(n_genes) = NULL
summary(n_genes)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   16.00   20.00   20.00   20.18   21.00   22.00
length(n_genes)
## [1] 40
sort(n_genes)
##  [1] 16 18 19 19 19 19 19 19 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
## [26] 20 21 21 21 21 21 21 21 21 21 22 22 22 22 22

Find gene symbols

Find gene symbols from bioMart.

All the gene symbols that can be found in bioMart are consistent with what we have. So no need to run it.

ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")

gene_BM = getBM(attributes = c("hgnc_symbol", "external_gene_name"), 
                filters = "external_gene_name", 
                values = assayed_genes, 
                mart = ensembl)
length(assayed_genes)
dim(gene_BM)
gene_BM[1:2,]

table(assayed_genes %in% gene_BM$external_gene_name)

t1 = table(gene_BM$external_gene_name)
dup = names(t1)[t1 > 1]
gene_BM[gene_BM$external_gene_name %in% dup,]

table(gene_BM$hgnc_symbol == gene_BM$external_gene_name)
w2kp = which(gene_BM$hgnc_symbol != gene_BM$external_gene_name)
gene_BM[w2kp,]

Find gene symbols using the alias2Symbol function from limma.

a2s = rep(NA, length(assayed_genes))
for(i in 1:length(assayed_genes)){
  gi = assayed_genes[i]
  ai = alias2Symbol(gi)
  if(length(ai) > 1){
    print(gi)
    print(ai)
  }
  a2s[i] = ai[1]
}
## [1] "HIST1H2BC"
## [1] "H2BC5" "H2BC4"
## [1] "MPP6"
## [1] "MPHOSPH6" "PALS2"   
## [1] "MARS"
## [1] "MARS1" "SLA2" 
## [1] "SEPT2"
## [1] "SEPTIN6" "SEPTIN2"
table(is.na(a2s))
## 
## FALSE  TRUE 
##  1951    49
table(a2s == assayed_genes, useNA = 'ifany')
## 
## FALSE  TRUE  <NA> 
##    45  1906    49
gene_info = data.table(sym_in_data = assayed_genes, sym_limma = a2s)

gene_info[sym_in_data != sym_limma,]
##      sym_in_data   sym_limma
##  1:      ADPRHL2       ADPRS
##  2:          AES        TLE5
##  3:     C12orf45    NOPCHAP1
##  4:      C3orf58      DIPK2A
##  5:      C6orf99   LINC02901
##  6:        CBWD2       ZNG1B
##  7:      CXorf57        RADX
##  8:      FAM102A       EEIG1
##  9:      FAM122C      PABIR3
## 10:      FAM153C    FAM153CP
## 11:     FAM160A2      FHIP1B
## 12:        GRASP     TAMALIN
## 13:        H2AFX        H2AX
## 14:    HIST1H2AG      H2AC11
## 15:    HIST1H2BC       H2BC5
## 16:    HIST1H2BK      H2BC12
## 17:    HIST1H2BN      H2BC15
## 18:     HIST1H3A        H3C1
## 19:     HIST1H3H       H3C10
## 20:     HIST1H4C        H4C3
## 21:    HIST2H2BF      H2BC18
## 22:         LRMP       IRAG2
## 23:      MFSD14C    MFSD14CP
## 24:         MKL1       MRTFA
## 25:         MPP6    MPHOSPH6
## 26:  RNASEH1-AS1  RNASEH1-DT
## 27:        SEPT6     SEPTIN6
## 28:        SEPT9     SEPTIN9
## 29: TMEM161B-AS1 TMEM161B-DT
## 30:        ARNTL       BMAL1
## 31:     C6orf106       ILRUN
## 32:     C6orf203      MTRES1
## 33:      FAM129A      NIBAN1
## 34:     FAM160B1      FHIP2A
## 35:      FAM192A    PSME3IP1
## 36:        HEXDC        HEXD
## 37:     HIST1H1E        H1-4
## 38:     KIAA0100       BLTP2
## 39:     KIAA1551       RESF1
## 40:         LARS       LARS1
## 41:         MARS       MARS1
## 42:      PLA2G16      PLAAT3
## 43:        SEPT2     SEPTIN6
## 44:       SMIM37        MTLN
## 45:         YARS       YARS1
##      sym_in_data   sym_limma
gene_info[, gene_symbol := sym_in_data]
gene_info[which(sym_in_data != sym_limma), gene_symbol := sym_limma]

dim(gene_info)
## [1] 2000    3
gene_info[1:5,]
##    sym_in_data sym_limma gene_symbol
## 1:       ABCD3     ABCD3       ABCD3
## 2:       ABCG1     ABCG1       ABCG1
## 3:       ABHD5     ABHD5       ABHD5
## 4:        ABI1      ABI1        ABI1
## 5:      ABLIM1    ABLIM1      ABLIM1
t1 = table(gene_info$gene_symbol)
table(t1)
## t1
##    1    2 
## 1998    1
gene_info[gene_symbol %in% names(t1)[t1 == 2],]
##    sym_in_data sym_limma gene_symbol
## 1:       SEPT6   SEPTIN6     SEPTIN6
## 2:       SEPT2   SEPTIN6     SEPTIN6
gene_info[sym_in_data == "HIST1H2BC", gene_symbol:="H2BC4"]
gene_info[sym_in_data == "SEPT6", gene_symbol:="SEPTIN6"]
gene_info[sym_in_data == "SEPT2", gene_symbol:="SEPTIN2"]

Prepare gene set information

Gene set annotations (by gene symbols) were downloaded from MSigDB website.

gmtfile = list()
gmtfile[["reactome"]] = "../Annotation/c2.cp.reactome.v2023.2.Hs.symbols.gmt"
gmtfile[["go_bp"]]    = "../Annotation/c5.go.bp.v2023.2.Hs.symbols.gmt"
gmtfile[["immune"]]   = "../Annotation/c7.all.v2023.2.Hs.symbols.gmt"

pathways = list()
for(k1 in names(gmtfile)){
  pathways[[k1]] = gmtPathways(gmtfile[[k1]])
}

names(pathways)
## [1] "reactome" "go_bp"    "immune"
sapply(pathways, length)
## reactome    go_bp   immune 
##     1692     7647     5219

Filter gene sets for size between 10 and 500.

lapply(pathways, function(v){
  quantile(sapply(v, length), probs = seq(0, 1, 0.1), na.rm = TRUE)
})
## $reactome
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    7.0    9.0   12.0   17.0   23.0   31.0   44.0   71.8  120.9 1463.0 
## 
## $go_bp
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    6.0    8.0   10.0   14.0   19.0   29.0   46.0   80.8  183.0 1966.0 
## 
## $immune
##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100% 
##    5  162  193  197  199  199  200  200  200  200 1992
for(k1 in names(pathways)){
  p1 = pathways[[k1]]
  pathways[[k1]] = p1[sapply(p1, length) %in% 10:500]
}

Conduct enrichment analysis

dim(gene_info)
## [1] 2000    3
max_n2kp = 10

goseq_res = NULL

for(k in 1:length(gene_sets)){
  if(length(gene_sets[[k]]) < 10) { next }
  
  print(k)
  set_k = paste0("set_", k)
  print(gene_sets[[k]])
  
  genes = gene_info$sym_in_data %in% gene_sets[[k]]
  names(genes) = gene_info$gene_symbol
  table(genes)
  
  pwf = nullp(genes, "hg38", "geneSymbol")

  for(k1 in names(pathways)){
    p1 = pathways[[k1]]
    res1 = goseq(pwf, "hg38", "geneSymbol", 
                 gene2cat=goseq:::reversemapping(p1))
    res1$FDR  = p.adjust(res1$over_represented_pvalue, method="BH")
    
    nD = sum(res1$FDR < 0.1)
    
    if(nD > 0){
      res1 = res1[order(res1$FDR),][1:min(nD, max_n2kp),]
      res1$category = gsub("REACTOME_|GOBP_", "", res1$category)
      res1$category = gsub("_", " ", res1$category)
      res1$category = tolower(res1$category)
      res1$category = substr(res1$category, start=1, stop=81)
      goseq_res[[set_k]][[k1]] = res1
    }
  }
}
## [1] 1
##  [1] "CHD2"     "ENOSF1"   "GRASP"    "JPX"      "PHYH"     "TBCCD1"  
##  [7] "TCTA"     "YPEL2"    "ZNF831"   "ABHD2"    "ERICH1"   "GNPAT"   
## [13] "IRAK4"    "MORC3"    "NLRC5"    "ODF3B"    "PEX11B"   "SEC14L1" 
## [19] "TNFRSF18" "ZBP1"     "ZNF236"

## [1] 2
##  [1] "ARHGAP9"  "ARID4A"   "CCM2"     "CD96"     "CITED2"   "DGKA"    
##  [7] "HELQ"     "HIST1H3H" "IFRD1"    "ING2"     "MXD1"     "RCAN3"   
## [13] "RGCC"     "RIC1"     "SLC12A7"  "SLC8B1"   "STARD10"  "TESPA1"  
## [19] "TMEM204"  "WARS2"

## [1] 3
##  [1] "AC093323.1"  "AC245297.3"  "ERVK3-1"     "FAM122C"     "GPCPD1"     
##  [6] "LINC01215"   "LINC02265"   "MAP3K2"      "MDS2"        "PLCL1"      
## [11] "SLC25A32"    "ST7L"        "TBPL1"       "TRAV38-2DV8" "TRBV5-4"    
## [16] "TRBV6-6"     "TRBV7-3"     "TTC3"        "ZNF506"      "MIR4435-2HG"
## [21] "PMVK"

## [1] 4
##  [1] "AC007952.4" "AC025164.1" "AC087239.1" "ATP2B1-AS1" "CD82"      
##  [6] "CSKMT"      "IER2"       "ILF3-DT"    "LBH"        "LINC00861" 
## [11] "NPIPB4"     "NUP58"      "PPP1R15A"   "PRR7"       "SLC2A3"    
## [16] "TAGAP"      "WHAMM"      "Z93241.1"   "AZI2"       "CCDC43"    
## [21] "ITGAL"      "N4BP1"

## [1] 5
##  [1] "CCDC66"    "CDC42SE2"  "CEP95"     "EPHB6"     "FHIT"      "FOXN3"    
##  [7] "HIST1H2BC" "NR4A3"     "NSUN6"     "PIK3IP1"   "PLK3"      "RSRP1"    
## [13] "SENP7"     "SLC44A1"   "WSB1"      "ZFP36L1"   "ITK"       "RECK"     
## [19] "SAMD9"

## [1] 6
##  [1] "ABHD5"      "AC119396.1" "AC245014.3" "AL133415.1" "AL139246.5"
##  [6] "ARL4A"      "ATG9B"      "FBXO3"      "IL23A"      "LTB"       
## [11] "MCUB"       "MHENCR"     "NAA16"      "NOP53"      "POLD4"     
## [16] "SCML4"      "SLC25A38"   "SNRK"       "TBCC"       "TNFRSF25"  
## [21] "ZC3H12D"

## [1] 7
##  [1] "COQ7"    "IER3"    "MZF1"    "ADGRE5"  "ARL4C"   "C1orf21" "CFD"    
##  [8] "CST7"    "CX3CR1"  "DIAPH2"  "ETNK1"   "GZMM"    "MCTP2"   "NCBP3"  
## [15] "PLEK"    "PPP2R5B" "PTGDR"   "PTPN7"   "RNPEPL1" "TTC16"

## [1] 8
##  [1] "AC008105.3"  "AC025171.2"  "ADA2"        "AK5"         "ARRDC2"     
##  [6] "CHRM3-AS2"   "DNASE1"      "GSTM4"       "KLF7"        "NRIP1"      
## [11] "NUAK2"       "PDE7A"       "RNASEH1-AS1" "SLC22A17"    "TUBD1"      
## [16] "TUBE1"       "ZNF84"       "ZNF862"      "SPTLC2"      "TMEM62"

## [1] 9
##  [1] "ANKH"     "CSRNP1"   "DHRS3"    "EPHX2"    "KANSL2"   "OXLD1"   
##  [7] "PECAM1"   "ZNF10"    "C12orf75" "GON4L"    "IL4R"     "INPP5D"  
## [13] "KMT2B"    "NARF"     "PHF20"    "PHF20L1"  "SP140"    "TNFRSF4"

## [1] 10
##  [1] "MFSD14C"    "STARD7"     "WAPL"       "ABHD3"      "AC016831.7"
##  [6] "AC022916.1" "AC118549.1" "CARMIL2"    "EAPP"       "ECPAS"     
## [11] "KANSL1-AS1" "NRDC"       "PARP11"     "PUM3"       "RAPGEF1"   
## [16] "SLC20A2"    "SPATA13"    "TM2D1"      "UTP25"      "Z93930.2"

## [1] 11
##  [1] "BCL3"      "C20orf204" "DDX3Y"     "EIF1AY"    "FAAH2"     "HRH2"     
##  [7] "KDM5D"     "OSM"       "PCSK1N"    "RPS4Y1"    "S100A12"   "SLF2"     
## [13] "SOCS3"     "TTTY15"    "UTY"       "ZFYVE28"

## [1] 12
##  [1] "AC145124.1"   "AIF1"         "AKIRIN1"      "BOLA2-SMG1P6" "C12orf45"    
##  [6] "COQ10B"       "CRLF3"        "IL16"         "LINC01550"    "MMP28"       
## [11] "MYNN"         "NUDT4"        "OSER1"        "OTULINL"      "PECR"        
## [16] "TMEM161B-AS1" "TMEM71"       "YY1AP1"       "JAKMIP1"      "SERTAD3"     
## [21] "SUSD6"        "TMEM138"

## [1] 13
##  [1] "DELE1"    "DSE"      "GIMAP1"   "IER5"     "KLRB1"    "PITPNC1" 
##  [7] "RAB37"    "SLC25A33" "SNX18"    "TBC1D10C" "TRBC2"    "TTC31"   
## [13] "UCP2"     "APOBEC3G" "GNPTAB"   "GZMB"     "KLRG1"    "LAG3"    
## [19] "MATK"     "SMPD2"

## [1] 14
##  [1] "ERCC5"   "FAM8A1"  "BROX"    "CCDC88B" "CD320"   "CISH"    "CTBS"   
##  [8] "FKBP11"  "IL2RB"   "IL2RG"   "KCNAB2"  "PRDM1"   "PREP"    "PVT1"   
## [15] "REXO2"   "RNMT"    "STAT4"   "TGS1"    "TSHZ2"   "UGCG"

## [1] 15
##  [1] "AC084033.3" "AMD1"       "CAMK4"      "CCDC141"    "CXorf57"   
##  [6] "EVI2B"      "FAM102A"    "FAM117B"    "FYB1"       "INPP4B"    
## [11] "IPCEF1"     "MAML2"      "MTRNR2L12"  "SNHG12"     "TC2N"      
## [16] "TMC8"       "TMEM107"    "TSC22D2"    "ADAM19"     "SCAMP4"    
## [21] "ZNF292"

## [1] 16
##  [1] "ATAD1"    "BTN3A1"   "CHMP1B"   "CHMP7"    "COG5"     "DBP"     
##  [7] "DIP2B"    "LRRC8D"   "NSMAF"    "PARP16"   "PCNP"     "PPP1R15B"
## [13] "SPIDR"    "STX16"    "THAP6"    "TMEM245"  "TSPYL4"   "ZBTB25"  
## [19] "ZMAT1"    "BTN3A2"   "NEK9"     "TAOK3"

## [1] 17
##  [1] "AC016405.3" "AC020911.2" "AC025171.3" "AC091271.1" "AC103591.3"
##  [6] "AF213884.3" "AL121944.1" "AL357060.1" "AL451085.1" "C6orf99"   
## [11] "ID3"        "JAML"       "KLF12"      "LINC01465"  "LRRN3"     
## [16] "OSER1-DT"   "PARP8"      "PNISR"      "CNOT4"      "SDF2"      
## [21] "WDR7"

## [1] 18
##  [1] "AL627171.1"  "NDUFV2-AS1"  "ZNF490"      "ADTRP"       "ATAD2B"     
##  [6] "CEMIP2"      "CRYBG1"      "GRK2"        "IGLV3-25"    "KIAA0040"   
## [11] "LRRC58"      "MT1X"        "NORAD"       "SLFN12L"     "SNHG9"      
## [16] "TENT5C"      "THUMPD3-AS1" "TRBV6-1"     "UBALD2"      "VPS13D"

## [1] 19
##  [1] "CLK4"    "MYADM"   "ZNF821"  "BLOC1S6" "C2orf68" "CD7"     "CLASRP" 
##  [8] "IFNAR1"  "KLF9"    "NME3"    "P4HTM"   "PIK3CB"  "PIM2"    "PRMT2"  
## [15] "RNF145"  "SACS"    "SETD5"   "SLC35A2" "SNX9"    "SP140L"

## [1] 20
##  [1] "AC025159.1"  "AL135791.1"  "CERNA1"      "LDLRAP1"     "LINC02273"  
##  [6] "LRRC8C-DT"   "LYRM9"       "NSMCE3"      "PITPNA-AS1"  "SNHG7"      
## [11] "TSPOAP1-AS1" "AC020915.3"  "GPR132"      "HACD3"       "MFSD14A"    
## [16] "NNT-AS1"     "NSD3"        "TOMM70"      "TSPAN14"     "TUT4"

## [1] 21
##  [1] "ADK"     "ARL6IP1" "C6orf62" "EFCAB2"  "HEATR5B" "MBD6"    "MTERF4" 
##  [8] "ST3GAL1" "STMN3"   "TOB2"    "TPP2"    "EHMT1"   "FRY"     "FRYL"   
## [15] "ICE1"    "SDR39U1" "SYNRG"   "UBA7"    "UBE3B"   "VPS36"

## [1] 22
##  [1] "CBLL1"    "CST3"     "FAM118A"  "FCN1"     "GIMAP8"   "IGKV1-5" 
##  [7] "IGLV1-44" "NT5DC1"   "PYROXD1"  "RABL2B"   "ALOX5AP"  "GAB3"    
## [13] "IFI44"    "KIAA1551" "MX2"      "OAS2"     "PPP2R3C"  "SRP54"   
## [19] "STK10"

## [1] 23
##  [1] "RBKS"       "SUPT20H"    "AC116407.2" "APOL6"      "ARHGEF3"   
##  [6] "ATXN7L3B"   "C4orf48"    "CARD16"     "ENY2"       "GIMAP7"    
## [11] "LPIN1"      "LPIN2"      "MESD"       "PDE4B"      "PHPT1"     
## [16] "PTGER2"     "RCBTB2"     "RNF19A"     "TCAF2"      "TMEM156"

## [1] 24
##  [1] "AL118516.1" "KIF9"       "RGS10"      "UPF3A"      "BUD23"     
##  [6] "CD69"       "CYTIP"      "DDIT4"      "EFR3A"      "GIMAP4"    
## [11] "IFITM2"     "ISG20"      "KLF6"       "MTRNR2L8"   "NAA38"     
## [16] "NADSYN1"    "NBDY"       "SIGIRR"     "SMIM37"     "STK17B"

## [1] 25
##  [1] "CD38"      "IGLV2-14"  "KIAA1328"  "MBNL2"     "RPS26"     "SIMC1"    
##  [7] "XIST"      "APOL1"     "CCL5"      "CCR4"      "GZMA"      "LINC01871"
## [13] "MYO1G"     "RAP1GAP2"  "RSAD2"     "SAMD9L"    "SPON2"     "TGFBR3"   
## [19] "VCAN"

## [1] 26
##  [1] "NFYB"    "PHC1"    "ANK3"    "ARID5B"  "CCDC12"  "CREBZF"  "GCN1"   
##  [8] "IL32"    "NFE2L1"  "NQO2"    "NT5C"    "PCGF5"   "PLAC8"   "SYTL1"  
## [15] "TIPARP"  "TMED4"   "XBP1"    "ZDHHC20" "ZNF593"

## [1] 27
##  [1] "AC009061.2" "AL645728.1" "AP002360.1" "C12orf29"   "COX10"     
##  [6] "FBXO8"      "GIMAP6"     "HDHD2"      "IL6R"       "KLHL6"     
## [11] "LST1"       "MAST4"      "NBPF14"     "OTUD5"      "RETREG1"   
## [16] "SMDT1"      "TBCK"       "TRAV8-6"    "ZFP14"      "BISPR"     
## [21] "C12orf4"    "PARP4"

## [1] 28
##  [1] "CD28"    "KLF10"   "ODC1"    "SH3YL1"  "AGAP2"   "AP3M2"   "AREG"   
##  [8] "CRIP2"   "CRTC3"   "EDA"     "GABPB2"  "ICOS"    "IL21R"   "KHNYN"  
## [15] "LTBP3"   "LY96"    "NFKBIZ"  "PLEKHA2" "SMAD5"   "TOB1"    "ZNF101"

## [1] 29
##  [1] "C16orf74"  "C1GALT1"   "COQ8A"     "CTSF"      "FCMR"      "HIVEP2"   
##  [7] "LEPROT"    "LIMD2"     "LIPT1"     "MLXIP"     "ORC4"      "PCMTD2"   
## [13] "RASA2"     "SPART"     "TBC1D7"    "TMEM154"   "TRABD2A"   "TRAV23DV6"
## [19] "ZNF140"    "NDUFC1"    "TMEM175"

## [1] 30
##  [1] "DPEP2"   "FAM227B" "GGT7"    "MAP3K8"  "ZFX"     "ASCL2"   "B3GALT4"
##  [8] "CCDC112" "CD58"    "FAM129A" "GALNS"   "IFIT1"   "IFIT2"   "IFIT3"  
## [15] "NBEAL2"  "NRROS"   "PHF23"   "POGLUT1" "ZEB2"    "ZNF276"

## [1] 31
##  [1] "RNASEK"   "ARHGAP10" "BCL9L"    "CARD11"   "CMIP"     "CROT"    
##  [7] "DENND4B"  "EIF2AK4"  "GPRIN3"   "IFI35"    "KIF21A"   "KIF21B"  
## [13] "KLHDC4"   "LY6E"     "PLA2G16"  "PRDM2"    "PREX1"    "SBNO2"   
## [19] "SCAF8"    "SLA"

## [1] 32
##  [1] "AC004687.1" "AC027644.3" "AC097376.2" "AL359220.1" "CASP4"     
##  [6] "COQ10A"     "GOLGA8B"    "GZMK"       "KCNK6"      "MATR3-1"   
## [11] "MUC20-OT1"  "RGS1"       "TRAV12-2"   "TRAV25"     "TRAV41"    
## [16] "TRBV6-5"    "WASHC4"     "ZNF600"     "ZNF91"      "FRMD4B"    
## [21] "SPOCK2"

## [1] 33
##  [1] "ACADSB"   "C3orf58"  "COL18A1"  "DAPP1"    "FAM153C"  "INTS6"   
##  [7] "IP6K2"    "MYLIP"    "NABP1"    "NEK1"     "NR1D1"    "NR1D2"   
## [13] "PLEKHM1"  "RCSD1"    "SLC26A11" "TECPR1"   "DOCK11"   "LPCAT4"  
## [19] "PPRC1"

## [1] 34
##  [1] "BTG1"    "BTG2"    "C1orf43" "EGR1"    "FBXL3"   "LCLAT1"  "TCF7"   
##  [8] "ATP8B2"  "CHD6"    "ETV6"    "FBXO9"   "IFNGR2"  "KIF3B"   "NEAT1"  
## [15] "PCED1B"  "PIEZO1"  "RHOH"    "TBC1D14" "ZNF708"

## [1] 35
##  [1] "ODF2L"  "UTP6"   "YPEL5"  "ANKAR"  "DTX3L"  "FAM53B" "GBP1"   "GBP3"  
##  [9] "GBP5"   "GNLY"   "HSH2D"  "IFI44L" "IRF1"   "IRF9"   "MX1"    "MYO1F" 
## [17] "NKG7"   "PARP14" "PARP9"  "XAF1"

## [1] 36
##  [1] "AC013264.1" "ACSS1"      "AL138963.3" "ANXA2R"     "C7orf31"   
##  [6] "CFAP36"     "LINC00623"  "METAP1"     "METTL21A"   "SNHG8"     
## [11] "SP3"        "STK19"      "TRAV12-1"   "TRAV13-2"   "TRAV21"    
## [16] "TRAV8-3"    "TRAV9-2"    "TRBV28"     "TRBV6-2"    "ZFAS1"     
## [21] "ZNF749"     "C1orf162"

## [1] 37
##  [1] "ARHGAP15"  "ATG13"     "CD40LG"    "DPYD"      "ERAP2"     "HOXB2"    
##  [7] "IGKV3-20"  "LAX1"      "SGSM3"     "SLC7A6"    "TCP11L2"   "TGIF1"    
## [13] "TRAV14DV4" "TRBC1"     "TTC39C"    "UBL3"      "XRRA1"     "CARD8-AS1"
## [19] "MT2A"      "OAS1"

## [1] 38
##  [1] "AC004854.2" "AC015982.1" "AC023157.3" "AC083880.1" "AC087623.3"
##  [6] "ARF4-AS1"   "BX284668.6" "HIPK1-AS1"  "JCHAIN"     "KCNQ1OT1"  
## [11] "LETM2"      "LINC00649"  "MZF1-AS1"   "NOCT"       "NPIPB11"   
## [16] "NPIPB5"     "PHLDA1"     "SDR42E2"    "THAP9-AS1"  "TOX"       
## [21] "MYBL1"

## [1] 39
##  [1] "ABCC10"    "ADGRG1"    "AKNA"      "CCL4"      "CD300A"    "CTSW"     
##  [7] "FCRL6"     "FGFBP2"    "GPR65"     "MIAT"      "PAXX"      "RASAL3"   
## [13] "S1PR5"     "TRANK1"    "TRAV29DV5" "TRBV12-3"  "TSPAN32"   "TTC38"    
## [19] "USP30-AS1" "ZNF683"

## [1] 40
##  [1] "ARMH1"    "COA1"     "LRMP"     "RAB33B"   "RNF139"   "TRAV8-2" 
##  [7] "ZSCAN18"  "APH1B"    "CITED4"   "COL6A3"   "COX17"    "HELB"    
## [13] "RNF157"   "RTKN2"    "SESN3"    "SLC23A2"  "SLC25A37" "SLC9A8"  
## [19] "TAF4B"    "ZNF267"

for(n1 in names(goseq_res)){
  k = as.numeric(gsub("set_", "", n1))
  print(n1)
  print(gene_sets[[k]])
  print(goseq_res[[n1]])

}
## [1] "set_7"
##  [1] "COQ7"    "IER3"    "MZF1"    "ADGRE5"  "ARL4C"   "C1orf21" "CFD"    
##  [8] "CST7"    "CX3CR1"  "DIAPH2"  "ETNK1"   "GZMM"    "MCTP2"   "NCBP3"  
## [15] "PLEK"    "PPP2R5B" "PTGDR"   "PTPN7"   "RNPEPL1" "TTC16"  
## $immune
##                                                    category
## 717 gse15735 ctrl vs hdac inhibitor treated cd4 tcell 2h dn
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 717            1.526635e-05                0.9999994          5       33
##         FDR
## 717 0.07795
## 
## [1] "set_9"
##  [1] "ANKH"     "CSRNP1"   "DHRS3"    "EPHX2"    "KANSL2"   "OXLD1"   
##  [7] "PECAM1"   "ZNF10"    "C12orf75" "GON4L"    "IL4R"     "INPP5D"  
## [13] "KMT2B"    "NARF"     "PHF20"    "PHF20L1"  "SP140"    "TNFRSF4" 
## $reactome
##                                                     category
## 352 formation of wdr5 containing histone modifying complexes
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 352            4.450854e-06                0.9999999          4       16
##             FDR
## 352 0.005345475
## 
## $go_bp
##                                     category over_represented_pvalue
## 1112               histone h3 k4 methylation            1.374380e-05
## 1115                    histone modification            3.378768e-05
## 1114                     histone methylation            3.485187e-05
## 2369             peptidyl lysine methylation            6.151068e-05
## 1113                  histone h4 acetylation            6.798180e-05
## 1211 internal protein amino acid acetylation            7.137034e-05
## 2370            peptidyl lysine modification            7.611393e-05
## 2367             peptidyl lysine acetylation            1.044145e-04
## 3147                     protein methylation            1.278195e-04
##      under_represented_pvalue numDEInCat numInCat        FDR
## 1112                0.9999999          3        8 0.05200756
## 1115                0.9999992          4       29 0.05200756
## 1114                0.9999997          3       10 0.05200756
## 2369                0.9999992          3       12 0.05200756
## 1113                1.0000000          2        2 0.05200756
## 1211                0.9999991          3       12 0.05200756
## 2370                0.9999977          4       36 0.05200756
## 2367                0.9999984          3       14 0.06242683
## 3147                0.9999979          3       15 0.06792894
## 
## [1] "set_11"
##  [1] "BCL3"      "C20orf204" "DDX3Y"     "EIF1AY"    "FAAH2"     "HRH2"     
##  [7] "KDM5D"     "OSM"       "PCSK1N"    "RPS4Y1"    "S100A12"   "SLF2"     
## [13] "SOCS3"     "TTTY15"    "UTY"       "ZFYVE28"  
## $immune
##                                                  category
## 4353 gse5099 classical m1 vs alternative m2 macrophage dn
## 3753               gse39820 ctrl vs il1b il6 cd4 tcell dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4353            1.764486e-05                0.9999996          4       21
## 3753            2.103421e-05                0.9999992          5       48
##             FDR
## 4353 0.05370035
## 3753 0.05370035
## 
## [1] "set_13"
##  [1] "DELE1"    "DSE"      "GIMAP1"   "IER5"     "KLRB1"    "PITPNC1" 
##  [7] "RAB37"    "SLC25A33" "SNX18"    "TBC1D10C" "TRBC2"    "TTC31"   
## [13] "UCP2"     "APOBEC3G" "GNPTAB"   "GZMB"     "KLRG1"    "LAG3"    
## [19] "MATK"     "SMPD2"   
## $immune
##                                                category over_represented_pvalue
## 4253 gse45739 unstim vs acd3 acd28 stim wt cd4 tcell dn            1.584655e-05
##      under_represented_pvalue numDEInCat numInCat        FDR
## 4253                0.9999991          6       61 0.08091246
## 
## [1] "set_22"
##  [1] "CBLL1"    "CST3"     "FAM118A"  "FCN1"     "GIMAP8"   "IGKV1-5" 
##  [7] "IGLV1-44" "NT5DC1"   "PYROXD1"  "RABL2B"   "ALOX5AP"  "GAB3"    
## [13] "IFI44"    "KIAA1551" "MX2"      "OAS2"     "PPP2R3C"  "SRP54"   
## [19] "STK10"   
## $reactome
##                             category over_represented_pvalue
## 187 creation of c4 and c2 activators            1.256826e-05
## 460 initial triggering of complement            3.204746e-05
## 171               complement cascade            6.474635e-05
##     under_represented_pvalue numDEInCat numInCat        FDR
## 187                0.9999999          3        6 0.01509448
## 460                0.9999997          3        8 0.01924450
## 171                0.9999992          3       10 0.02592012
## 
## [1] "set_23"
##  [1] "RBKS"       "SUPT20H"    "AC116407.2" "APOL6"      "ARHGEF3"   
##  [6] "ATXN7L3B"   "C4orf48"    "CARD16"     "ENY2"       "GIMAP7"    
## [11] "LPIN1"      "LPIN2"      "MESD"       "PDE4B"      "PHPT1"     
## [16] "PTGER2"     "RCBTB2"     "RNF19A"     "TCAF2"      "TMEM156"   
## $reactome
##                       category over_represented_pvalue under_represented_pvalue
## 1170 triglyceride biosynthesis             5.64769e-05                1.0000000
## 1073           synthesis of pe             1.42343e-04                0.9999997
##      numDEInCat numInCat        FDR
## 1170          2        2 0.06782876
## 1073          2        3 0.08547700
## 
## [1] "set_24"
##  [1] "AL118516.1" "KIF9"       "RGS10"      "UPF3A"      "BUD23"     
##  [6] "CD69"       "CYTIP"      "DDIT4"      "EFR3A"      "GIMAP4"    
## [11] "IFITM2"     "ISG20"      "KLF6"       "MTRNR2L8"   "NAA38"     
## [16] "NADSYN1"    "NBDY"       "SIGIRR"     "SMIM37"     "STK17B"    
## $immune
##                                                 category
## 4673 gse7548 naive vs day7 pcc immunization cd4 tcell dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4673            9.748277e-06                0.9999995          6       65
##            FDR
## 4673 0.0497747
## 
## [1] "set_25"
##  [1] "CD38"      "IGLV2-14"  "KIAA1328"  "MBNL2"     "RPS26"     "SIMC1"    
##  [7] "XIST"      "APOL1"     "CCL5"      "CCR4"      "GZMA"      "LINC01871"
## [13] "MYO1G"     "RAP1GAP2"  "RSAD2"     "SAMD9L"    "SPON2"     "TGFBR3"   
## [19] "VCAN"     
## $immune
##                                        category over_represented_pvalue
## 3623 gse3982 cent memory cd4 tcell vs nkcell dn            4.156056e-06
##      under_represented_pvalue numDEInCat numInCat        FDR
## 3623                0.9999999          5       35 0.02122082
## 
## [1] "set_28"
##  [1] "CD28"    "KLF10"   "ODC1"    "SH3YL1"  "AGAP2"   "AP3M2"   "AREG"   
##  [8] "CRIP2"   "CRTC3"   "EDA"     "GABPB2"  "ICOS"    "IL21R"   "KHNYN"  
## [15] "LTBP3"   "LY96"    "NFKBIZ"  "PLEKHA2" "SMAD5"   "TOB1"    "ZNF101" 
## $reactome
##                                              category over_represented_pvalue
## 174 constitutive signaling by aberrant pi3k in cancer            5.070047e-05
##     under_represented_pvalue numDEInCat numInCat        FDR
## 174                0.9999995          3        8 0.06089126
## 
## [1] "set_35"
##  [1] "ODF2L"  "UTP6"   "YPEL5"  "ANKAR"  "DTX3L"  "FAM53B" "GBP1"   "GBP3"  
##  [9] "GBP5"   "GNLY"   "HSH2D"  "IFI44L" "IRF1"   "IRF9"   "MX1"    "MYO1F" 
## [17] "NKG7"   "PARP14" "PARP9"  "XAF1"  
## $reactome
##                       category over_represented_pvalue under_represented_pvalue
## 478       interferon signaling            8.827006e-06                0.9999995
## 477 interferon gamma signaling            1.475343e-05                0.9999995
##     numDEInCat numInCat         FDR
## 478          7       88 0.008859437
## 477          5       35 0.008859437
## 
## $go_bp
##                                                    category
## 623                            defense response to symbiont
## 486                 cellular response to type ii interferon
## 4273                         response to type ii interferon
## 693  disruption of anatomical structure in another organism
## 4279                                      response to virus
## 4683          type ii interferon mediated signaling pathway
## 370                                            cell killing
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 623             3.728908e-08                1.0000000          9       81
## 486             1.089531e-07                1.0000000          6       25
## 4273            3.544879e-07                1.0000000          6       30
## 693             5.204098e-07                1.0000000          5       17
## 4279            5.741523e-07                1.0000000          9      110
## 4683            6.044950e-05                0.9999993          3        8
## 370             7.374768e-05                0.9999961          5       44
##               FDR
## 623  0.0001783537
## 486  0.0002605613
## 4273 0.0005492341
## 693  0.0005492341
## 4279 0.0005492341
## 4683 0.0481883286
## 370  0.0503907347
## 
## $immune
##                                                                               category
## 4082                                            gse42724 naive bcell vs plasmablast up
## 473                                                    gse14000 unstim vs 4h lps dc dn
## 1353                                       gse18791 unstim vs newcatsle virus dc 6h dn
## 4971 howard pbmc inact monov influenza a indonesia 05 2005 h5n1 age 19 39yo as03 adjuv
## 1456     gse19888 adenosine a3r inh vs act with inhibitor pretreatment in mast cell up
## 2348                                    gse26030 th1 vs th17 day5 post polarization up
## 4961 howard dendritic cell inact monov influenza a indonesia 05 2005 h5n1 age 18 49yo 
## 4965 howard neutrophil inact monov influenza a indonesia 05 2005 h5n1 age 18 49yo 1dy 
## 4972   howard t cell inact monov influenza a indonesia 05 2005 h5n1 age 18 49yo 1dy up
## 1343                                         gse18791 ctrl vs newcastle virus dc 8h dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4082            0.000000e+00                        1         11       67
## 473             6.436927e-11                        1         10       62
## 1353            1.664468e-10                        1         10       67
## 4971            1.739134e-10                        1         10       67
## 1456            2.877583e-10                        1          9       49
## 2348            5.031712e-10                        1          9       52
## 4961            7.522654e-10                        1          9       54
## 4965            7.926391e-10                        1         11      104
## 4972            9.454219e-10                        1          7       23
## 1343            2.348254e-09                        1          9       61
##               FDR
## 4082 0.000000e+00
## 473  1.643348e-07
## 1353 2.220005e-07
## 4971 2.220005e-07
## 1456 2.938588e-07
## 2348 4.281987e-07
## 4961 5.059019e-07
## 4965 5.059019e-07
## 4972 5.363694e-07
## 1343 1.199018e-06
## 
## [1] "set_39"
##  [1] "ABCC10"    "ADGRG1"    "AKNA"      "CCL4"      "CD300A"    "CTSW"     
##  [7] "FCRL6"     "FGFBP2"    "GPR65"     "MIAT"      "PAXX"      "RASAL3"   
## [13] "S1PR5"     "TRANK1"    "TRAV29DV5" "TRBV12-3"  "TSPAN32"   "TTC38"    
## [19] "USP30-AS1" "ZNF683"   
## $immune
##                                                     category
## 4251 gse45739 unstim vs acd3 acd28 stim nras ko cd4 tcell dn
## 1623     gse21063 wt vs nfatc1 ko 16h anti igm stim bcell dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4251            1.393901e-06                1.0000000          6       41
## 1623            1.408540e-05                0.9999997          4       17
##              FDR
## 4251 0.007117258
## 1623 0.035960021
saveRDS(goseq_res, sprintf("output/gene_set_enrichments_%s.RDS", 
                           file_tag))

Session information

gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  8958373 478.5   16391124 875.4         NA 16391124 875.4
## Vcells 19172611 146.3   57790505 441.0      65536 77218972 589.2
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] TxDb.Hsapiens.UCSC.hg38.knownGene_3.16.0
##  [2] GenomicFeatures_1.50.4                  
##  [3] GenomicRanges_1.50.2                    
##  [4] GenomeInfoDb_1.34.9                     
##  [5] org.Hs.eg.db_3.16.0                     
##  [6] AnnotationDbi_1.60.2                    
##  [7] IRanges_2.32.0                          
##  [8] S4Vectors_0.36.2                        
##  [9] Biobase_2.58.0                          
## [10] BiocGenerics_0.44.0                     
## [11] goseq_1.50.0                            
## [12] geneLenDataBase_1.34.0                  
## [13] BiasedUrn_2.0.10                        
## [14] fgsea_1.24.0                            
## [15] biomaRt_2.54.1                          
## [16] limma_3.54.2                            
## [17] tidyr_1.3.0                             
## [18] ggpubr_0.6.0                            
## [19] ggplot2_3.4.2                           
## [20] data.table_1.14.8                       
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-162                matrixStats_1.0.0          
##  [3] bitops_1.0-7                bit64_4.0.5                
##  [5] filelock_1.0.2              progress_1.2.2             
##  [7] httr_1.4.6                  tools_4.2.3                
##  [9] backports_1.4.1             bslib_0.4.2                
## [11] utf8_1.2.3                  R6_2.5.1                   
## [13] mgcv_1.8-42                 DBI_1.1.3                  
## [15] colorspace_2.1-0            withr_2.5.0                
## [17] tidyselect_1.2.0            prettyunits_1.1.1          
## [19] bit_4.0.5                   curl_5.0.1                 
## [21] compiler_4.2.3              cli_3.6.1                  
## [23] xml2_1.3.4                  DelayedArray_0.24.0        
## [25] rtracklayer_1.58.0          sass_0.4.5                 
## [27] scales_1.2.1                rappdirs_0.3.3             
## [29] Rsamtools_2.14.0            stringr_1.5.0              
## [31] digest_0.6.31               rmarkdown_2.21             
## [33] XVector_0.38.0              pkgconfig_2.0.3            
## [35] htmltools_0.5.5             MatrixGenerics_1.10.0      
## [37] dbplyr_2.3.2                fastmap_1.1.1              
## [39] rlang_1.1.0                 rstudioapi_0.14            
## [41] RSQLite_2.3.1               BiocIO_1.8.0               
## [43] jquerylib_0.1.4             generics_0.1.3             
## [45] jsonlite_1.8.4              BiocParallel_1.32.6        
## [47] dplyr_1.1.2                 car_3.1-2                  
## [49] RCurl_1.98-1.12             magrittr_2.0.3             
## [51] GO.db_3.16.0                GenomeInfoDbData_1.2.9     
## [53] Matrix_1.6-4                Rcpp_1.0.10                
## [55] munsell_0.5.0               fansi_1.0.4                
## [57] abind_1.4-5                 lifecycle_1.0.3            
## [59] stringi_1.7.12              yaml_2.3.7                 
## [61] carData_3.0-5               SummarizedExperiment_1.28.0
## [63] zlibbioc_1.44.0             BiocFileCache_2.6.1        
## [65] grid_4.2.3                  blob_1.2.4                 
## [67] parallel_4.2.3              crayon_1.5.2               
## [69] lattice_0.20-45             splines_4.2.3              
## [71] Biostrings_2.66.0           cowplot_1.1.1              
## [73] hms_1.1.3                   KEGGREST_1.38.0            
## [75] knitr_1.44                  pillar_1.9.0               
## [77] rjson_0.2.21                ggsignif_0.6.4             
## [79] codetools_0.2-19            fastmatch_1.1-3            
## [81] XML_3.99-0.14               glue_1.6.2                 
## [83] evaluate_0.20               png_0.1-8                  
## [85] vctrs_0.6.2                 gtable_0.3.3               
## [87] purrr_1.0.1                 cachem_1.0.7               
## [89] xfun_0.39                   broom_1.0.4                
## [91] restfulr_0.0.15             rstatix_0.7.2              
## [93] tibble_3.2.1                GenomicAlignments_1.34.1   
## [95] memoise_2.0.1